package crawler.binli;

import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.zql.entity.AgencyDao;
import com.zql.entity.AgencyEntity2;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;
import util.Location;
import util.ProCity;

/** 
* @author 钟琴隆 E-mail: ai31354907@163.com
* @version 创建时间：2018年12月6日 下午2:52:47 
* 类说明 
*/
public class BinLiPageProcessor implements PageProcessor {

	@Override
	public void process(Page page) {
		List<String> name = page.getHtml().xpath("div[@class='by_accordion']/div/div/div/div[@class='by_accordion_headline']/text()").replace("\\s", "").all();
		List<String> nn10 = page.getHtml().xpath("div[@class='by_accordion']/div/div/div/div/div/div[@class='by_richtext']/p[1]").all();
		List<String> address = new ArrayList<>();
		List<String> lat = new ArrayList<>();
		List<String> lng = new ArrayList<>();
		List<String> proName = new ArrayList<>();
		List<String> cityName = new ArrayList<>();
		Pattern p = Pattern.compile("(?<=：).*");
		List<String> dnull = new ArrayList<>();
		for(int i=0;i<nn10.size();i++) {
			String[] nn11 = nn10.get(i).split("<br>");
			Matcher  m = p.matcher(nn11[1]);
			if(m.find()) {
				address.add(m.group());
			}else {}
			String[] nn44 = Location.getLocation(nn11[1]);
			lat.add(nn44[1]);
			lng.add(nn44[0]);
			String[] proCity = ProCity.getData(nn44[1], nn44[0]);
			proName.add(proCity[0]);
			cityName.add(proCity[1]);
			dnull.add("");
		}
		List<String> sellTell = page.getHtml().xpath("div[@class='by_accordion']/div/div/div/div/div/div[@class='by_richtext']/p[1]")
								.regex("\\(\\+86\\).*")
								.replace("<br> </p>", "")
								.replace("</p>", "")
								.all();
		//List<String> nn30 = page.getHtml().xpath("div[@class='by_accordion']/div/div/div/div/div[@class='templateLib parbase section linkButton']/a").links().all();
		//System.out.println(nn30.size()+"===="+nn30);
		AgencyEntity2 agency = new AgencyEntity2();
		//agency.setdCloseDate(null);
		//agency.setdOpeningDate(null);
		agency.setdUpdateTime(new Timestamp(System.currentTimeMillis()));
		//agency.setnBrandID(-1);
		agency.setsBrand("宾利");
		//agency.setnDealerIDWeb(-1);
		//agency.setnManufacturerID(-1);
		agency.setsManufacturer("宾利汽车");
		//agency.setnState(1);
		agency.setsAddress(address);
		agency.setsCity(cityName);
		agency.setsCustomerServiceCall(dnull);
		agency.setsDealerName(name);
		agency.setsDealerType(dnull);
		agency.setsProvince(proName);
		agency.setsSaleCall(sellTell);
		agency.setsLongitude(lng);
		agency.setsLatitude(lat);
		new AgencyDao().add(agency);
	}
	private Site site = Site.me()
						.addHeader("Host", "www.bentleymotors.com")
						.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0")
						.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
						.addHeader("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2")
						.addHeader("Accept-Encoding", "gzip, deflate, br")
						.addHeader("Referer", "https://www.bentleymotors.com/cn/zh.html")
						.addHeader("Connection", "keep-alive")
						.addHeader("Upgrade-Insecure-Requests", "1")
						.addHeader("TE", "Trailers")
						.setSleepTime(0)
						.setTimeOut(60000)
						.setCycleRetryTimes(3)
						;
	@Override
	public Site getSite() {
		return site;
	}
public static void main(String[] args) {
	Spider.create(new BinLiPageProcessor())
	.addUrl("https://www.bentleymotors.com/cn/zh/world-of-bentley/ownership/dealer-locator.html")
	.run();
}
}
 